package com.smartandroid.sa.tag.safety;

import com.smartandroid.sa.tag.helper.Validate;
import com.smartandroid.sa.tag.nodes.Attribute;
import com.smartandroid.sa.tag.nodes.Attributes;
import com.smartandroid.sa.tag.nodes.Document;
import com.smartandroid.sa.tag.nodes.Element;
import com.smartandroid.sa.tag.nodes.Node;
import com.smartandroid.sa.tag.nodes.TextNode;
import com.smartandroid.sa.tag.parser.Tag;
import com.smartandroid.sa.tag.select.NodeTraversor;
import com.smartandroid.sa.tag.select.NodeVisitor;

/**
 * The whitelist based HTML cleaner. Use to ensure that end-user provided HTML
 * contains only the elements and attributes that you are expecting; no junk,
 * and no cross-site scripting attacks!
 * <p/>
 * The HTML cleaner parses the input as HTML and then runs it through a
 * white-list, so the output HTML can only contain HTML that is allowed by the
 * whitelist.
 * <p/>
 * It is assumed that the input HTML is a body fragment; the clean methods only
 * pull from the source's body, and the canned white-lists only allow body
 * contained tags.
 * <p/>
 * Rather than interacting directly with a Cleaner object, generally see the
 * {@code clean} methods in {@link org.SmartTag.Jsoup}.
 */
public class Cleaner {
	private Whitelist whitelist;

	/**
	 * Create a new cleaner, that sanitizes documents using the supplied
	 * whitelist.
	 * 
	 * @param whitelist
	 *            white-list to clean with
	 */
	public Cleaner(Whitelist whitelist) {
		Validate.notNull(whitelist);
		this.whitelist = whitelist;
	}

	/**
	 * Creates a new, clean document, from the original dirty document,
	 * containing only elements allowed by the whitelist. The original document
	 * is not modified. Only elements from the dirt document's <code>body</code>
	 * are used.
	 * 
	 * @param dirtyDocument
	 *            Untrusted base document to clean.
	 * @return cleaned document.
	 */
	public Document clean(Document dirtyDocument) {
		Validate.notNull(dirtyDocument);

		Document clean = Document.createShell(dirtyDocument.baseUri());
		if (dirtyDocument.body() != null) // frameset documents won't have a
											// body. the clean doc will have
											// empty body.
			copySafeNodes(dirtyDocument.body(), clean.body());

		return clean;
	}

	/**
	 * Determines if the input document is valid, against the whitelist. It is
	 * considered valid if all the tags and attributes in the input HTML are
	 * allowed by the whitelist.
	 * <p/>
	 * This method can be used as a validator for user input forms. An invalid
	 * document will still be cleaned successfully using the
	 * {@link #clean(Document)} document. If using as a validator, it is
	 * recommended to still clean the document to ensure enforced attributes are
	 * set correctly, and that the output is tidied.
	 * 
	 * @param dirtyDocument
	 *            document to test
	 * @return true if no tags or attributes need to be removed; false if they
	 *         do
	 */
	public boolean isValid(Document dirtyDocument) {
		Validate.notNull(dirtyDocument);

		Document clean = Document.createShell(dirtyDocument.baseUri());
		int numDiscarded = copySafeNodes(dirtyDocument.body(), clean.body());
		return numDiscarded == 0;
	}

	/**
	 * Iterates the input and copies trusted nodes (tags, attributes, text) into
	 * the destination.
	 */
	private final class CleaningVisitor implements NodeVisitor {
		private int numDiscarded = 0;
		private final Element root;
		private Element destination; // current element to append nodes to

		private CleaningVisitor(Element root, Element destination) {
			this.root = root;
			this.destination = destination;
		}

		public void head(Node source, int depth) {
			if (source instanceof Element) {
				Element sourceEl = (Element) source;

				if (whitelist.isSafeTag(sourceEl.tagName())) { // safe, clone
																// and copy safe
																// attrs
					ElementMeta meta = createSafeElement(sourceEl);
					Element destChild = meta.el;
					destination.appendChild(destChild);

					numDiscarded += meta.numAttribsDiscarded;
					destination = destChild;
				} else if (source != root) { // not a safe tag, so don't add.
												// don't count root against
												// discarded.
					numDiscarded++;
				}
			} else if (source instanceof TextNode) {
				TextNode sourceText = (TextNode) source;
				TextNode destText = new TextNode(sourceText.getWholeText(),
						source.baseUri());
				destination.appendChild(destText);
			} else { // else, we don't care about comments, xml proc
						// instructions, etc
				numDiscarded++;
			}
		}

		public void tail(Node source, int depth) {
			if (source instanceof Element
					&& whitelist.isSafeTag(source.nodeName())) {
				destination = destination.parent(); // would have descended, so
													// pop destination stack
			}
		}
	}

	private int copySafeNodes(Element source, Element dest) {
		CleaningVisitor cleaningVisitor = new CleaningVisitor(source, dest);
		NodeTraversor traversor = new NodeTraversor(cleaningVisitor);
		traversor.traverse(source);
		return cleaningVisitor.numDiscarded;
	}

	private ElementMeta createSafeElement(Element sourceEl) {
		String sourceTag = sourceEl.tagName();
		Attributes destAttrs = new Attributes();
		Element dest = new Element(Tag.valueOf(sourceTag), sourceEl.baseUri(),
				destAttrs);
		int numDiscarded = 0;

		Attributes sourceAttrs = sourceEl.attributes();
		for (Attribute sourceAttr : sourceAttrs) {
			if (whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr))
				destAttrs.put(sourceAttr);
			else
				numDiscarded++;
		}
		Attributes enforcedAttrs = whitelist.getEnforcedAttributes(sourceTag);
		destAttrs.addAll(enforcedAttrs);

		return new ElementMeta(dest, numDiscarded);
	}

	private static class ElementMeta {
		Element el;
		int numAttribsDiscarded;

		ElementMeta(Element el, int numAttribsDiscarded) {
			this.el = el;
			this.numAttribsDiscarded = numAttribsDiscarded;
		}
	}

}
